{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "974"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import malaya\n",
    "from glob import glob\n",
    "from tqdm import tqdm\n",
    "import json\n",
    "import random\n",
    "\n",
    "news = glob('news/*.json')\n",
    "len(news)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['text', 'title'])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open('populate-news.json') as fopen:\n",
    "    data = json.load(fopen)\n",
    "    \n",
    "data.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Ikatan alumni bintara polisi angkatan ke-27 tahun 2005 gelombang II (ZLD) Polres Sekadau, melakukan aksi bakti sosial dengan membagikan paket sembako kepada warga kurang mampu dan panti asuhan. Sebanyak 25 paket sembako berisi beras, gula, tepung, minyak goreng dan mie instan diserahkan langsung secara door to door kepada warga yang sebelumnya telah didata, untuk menghindari kerumunan. Sasarannya adalah warga kurang mampu atau fakir miskin, lansia, warga terdampak pandemi covid-19 dan anak yatim piatu di panti asuhan Harapan Bunda dan Yayasan Filipi. Bripka Hisbullah Aji selaku koordinator aksi baksos mengatakan, ide pemberian bantuan sembako muncul dari rasa keprihatinan terhadap warga lantaran sulitnya ekonomi ditengah masa pandemi. Terlebih bagi warga yang belum menerima bantuan dari pemerintah. Anggota ZLD Polres Sekadau yang berjumlah 22 personel kemudian mengadakan iuran sukarela yang dikumpulkan sehingga aksi bakti sosial ini dapat terealisasi. Bahkan anggota ZLD yang sudah bertugas diluar Polres Sekadau pun turut andil menyisihkan sebagian rejekinya untuk berbagi dengan sesama. Hal yang mendasari adalah rasa cinta dengan Kota Sekadau, dengan masyarakatnya yang hidup rukun aman dan damai. Dimana angkatan ZLD pertama kali ditugaskan dari Polda Kalbar pada tahun 2006, dan ikut andil membangun Polres Sekadau yang saat itu masih merupakan Kabupaten baru. “Semboyan kami yaitu ‘kami memang tidak Sedarah, tapi lebih dari Saudara’. Kekompakan dan rasa kemanusiaan kami tetap terjalin meskipun jarak berjauhan,” kata Bripka Aji. “Sekadau adalah tempat dimana sejak awal kami bertugas dan hidup berdampingan dengan masyarakat. Sudah selayaknya kami berbagi, meskipun belum seberapa namun besar harapan kami bantuan diberikan bisa tepat sasaran dan meringankan beban warga,” pungkasnya. (Mus)'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['text'][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from malaya.text.vectorizer import SkipGramCountVectorizer\n",
    "\n",
    "stopwords = malaya.text.function.get_stopwords()\n",
    "bow = CountVectorizer(\n",
    "    ngram_range = (1, 3),\n",
    "    stop_words = stopwords,\n",
    "    lowercase = False,\n",
    ")\n",
    "\n",
    "stopwords = malaya.text.function.get_stopwords()\n",
    "skip_bow = SkipGramCountVectorizer(\n",
    "    ngram_range = (1, 3),\n",
    "    stop_words = stopwords,\n",
    "    lowercase = False,\n",
    "    skip = 2\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "rejected = ['saya', 'awak', 'kami']\n",
    "\n",
    "def headline(string, length = 300):\n",
    "    splitted = malaya.text.function.split_into_sentences(string)\n",
    "    splitted = [s for s in splitted if all([r not in s for r in rejected])]\n",
    "    selected, index, results = '', 0, []\n",
    "    while len(selected) <= length and index < len(splitted):\n",
    "        selected += splitted[index]\n",
    "        results.append(splitted[index])\n",
    "        index += 1\n",
    "    return ' '.join(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Ikatan alumni bintara polisi angkatan ke-27 tahun 2005 gelombang II (ZLD) Polres Sekadau, melakukan aksi bakti sosial dengan membagikan paket sembako kepada warga kurang mampu dan panti asuhan. Sebanyak 25 paket sembako berisi beras, gula, tepung, minyak goreng dan mie instan diserahkan langsung secara door to door kepada warga yang sebelumnya telah didata, untuk menghindari kerumunan.'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "headline(data['text'][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "def loop(texts):\n",
    "    texts = texts[0]\n",
    "    results = []\n",
    "    for i in tqdm(range(len(texts))):\n",
    "        try:\n",
    "            h = headline(texts[i])\n",
    "            keywords = malaya.keyword_extraction.rake(texts[i], \n",
    "                                                      vectorizer = bow, \n",
    "                                                      top_k = random.randint(5, 10))\n",
    "            keywords = [k[1] for k in keywords]\n",
    "            keywords_rake = malaya.keyword_extraction.rake(texts[i],\n",
    "                                                      top_k = random.randint(5, 10))\n",
    "            keywords_rake = [k[1] for k in keywords_rake]\n",
    "            results.append((keywords, keywords_rake, h))\n",
    "        except:\n",
    "            pass\n",
    "        \n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 63%|██████▎   | 6615/10448 [05:17<02:02, 31.26it/s]] \n",
      "100%|██████████| 7/7 [00:00<00:00, 34.55it/s]04it/s]]\n",
      "100%|██████████| 10448/10448 [05:21<00:00, 32.47it/s]\n",
      "100%|██████████| 10448/10448 [05:30<00:00, 31.60it/s]\n",
      "100%|██████████| 10448/10448 [07:32<00:00, 23.07it/s]\n",
      "100%|██████████| 10448/10448 [07:33<00:00, 23.03it/s]\n",
      "100%|██████████| 10448/10448 [07:38<00:00, 22.77it/s]\n",
      "100%|██████████| 10448/10448 [07:45<00:00, 22.46it/s]\n",
      "100%|██████████| 10448/10448 [07:46<00:00, 22.40it/s]\n",
      "100%|██████████| 10448/10448 [07:58<00:00, 21.86it/s]\n",
      "100%|██████████| 10448/10448 [07:57<00:00, 21.87it/s]\n",
      "100%|██████████| 10448/10448 [08:02<00:00, 21.63it/s]\n",
      "100%|██████████| 10448/10448 [08:05<00:00, 21.52it/s]\n",
      "100%|██████████| 10448/10448 [08:10<00:00, 21.31it/s]\n",
      "100%|██████████| 10448/10448 [08:24<00:00, 20.72it/s]\n",
      "100%|██████████| 10448/10448 [08:35<00:00, 20.29it/s]\n"
     ]
    }
   ],
   "source": [
    "import mp\n",
    "results = mp.multiprocessing(data['text'], loop, cores = 15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(['ZLD Polres Sekadau',\n",
       "  'Polres Sekadau aksi',\n",
       "  'sembako warga panti',\n",
       "  'Sekadau aksi bakti',\n",
       "  'warga panti asuhan'],\n",
       " ['Ikatan alumni bintara polisi angkatan ke-27',\n",
       "  'warga lantaran sulitnya ekonomi ditengah',\n",
       "  'ide pemberian bantuan sembako muncul',\n",
       "  '25 paket sembako berisi beras',\n",
       "  'panti asuhan Harapan Bunda'],\n",
       " 'Ikatan alumni bintara polisi angkatan ke-27 tahun 2005 gelombang II (ZLD) Polres Sekadau, melakukan aksi bakti sosial dengan membagikan paket sembako kepada warga kurang mampu dan panti asuhan. Sebanyak 25 paket sembako berisi beras, gula, tepung, minyak goreng dan mie instan diserahkan langsung secara door to door kepada warga yang sebelumnya telah didata, untuk menghindari kerumunan.')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('keywords-headline.json', 'w') as fopen:\n",
    "    json.dump(results, fopen)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
